install.packages('corpcor')
install.packages('mctest')
require(mgcv)
require(corpcor)
require(mctest)


#Importing datasets
all_sellerdata  = read.csv(file="C:\\Users\\f00456n\\Documents\\Amazon Price Dynamics\\Data Scraped\\New Data\\Modeling data sets\\Microwave\\finalnumsellers_modeldata_microwave.csv", header=TRUE, sep=",")

modeldata = all_sellerdata[4:133,]

modeldata[is.na(modeldata)] = 0

attach(modeldata)

#Kenmore: Number of sellers
data_numsellers_knm = data.frame(cbind(incr10per_clust3_knmore,
                                       incr20per_clust3_knmore,
                                       incr5per_clust3_knmore,
                                       decr10per_clust3_knmore,
                                       decr20per_clust3_knmore,
                                       decr5per_clust3_knmore,
                                       incr10per_clust1_knmore,
                                       incr20per_clust1_knmore,
                                       incr5per_clust1_knmore,
                                       decr10per_clust1_knmore,
                                       decr20per_clust1_knmore,
                                       decr5per_clust1_knmore,
                                       buyboxprice_knmore_lag1,
                                       nonbbox3pprice_knmore_lag,
                                       knmore_bottombrandlag1,         
                                       knmore_topbrandlag1,
                                       weekend,
                                       priceclust1_knmore_lag,
                                       priceclust3_knmore_lag,
                                       #more additional variables - in absence of review text
                                       Answered_Questionsknmore_lag1,
                                       Product_reviews_knmore_lag1,
                                       product_star_knmore_lag1,
                                       salerank_knmore_lag1,
                                       salerank_subcat_knmore_lag1,
                                       num_knmoreused3p_lag1,
                                       num_knmoreusedamzn_lag1,
                                       seasonal_sale))

#VARIANCE CHECK
variance_X = rep(0,ncol(data_numsellers_knm))

for (m in (1:ncol(data_numsellers_knm)))
{ variance_X[m] = var(data_numsellers_knm[,m]) }

nonzero_var = ncol(data_numsellers_knm)- sum((variance_X == 0)[]*1) #number of variables with non-zero variance

#Retaining variables with non-zero variance

trainX_retained = matrix(0,nrow(data_numsellers_knm),nonzero_var)

trainX_retained = data_numsellers_knm[,(variance_X != 0)]


data_numsellers_knm = trainX_retained

gam_numsellers_knm = gam(NUM_SELLERS_KNM ~ Answered_Questionsknmore_lag1	+
                           buyboxprice_knmore_lag1	+
                           nonbbox3pprice_knmore_lag +
                           decr10per_clust3_knmore	+
                          # decr10per_clust1_knmore	+
                          # decr5per_clust3_knmore	+
                           #incr5per_clust3_knmore	+
                           knmore_bottombrandlag1	+
                           priceclust1_knmore_lag +
                           priceclust3_knmore_lag + 
                         # incr10per_clust3_knmore	+
                           ##num_knmoreused3p_lag1	+
                            Product_reviews_knmore_lag1	+
                           salerank_knmore_lag1	+
                           #weekend	+
                         #  product_star_knmore_lag1	+
                           #salerank_subcat_knmore_lag1	+
                           #num_prchngknmoreused3p_lag1	+
                           #psonic_topbrandlag1	+
                           seasonal_sale, family= gaussian(link ='identity') ,data=data_numsellers_knm,method="REML",optimizer=c("outer","newton"), fit = TRUE)


summary(gam_numsellers_knm)
                         
                           
#LG: Number of sellers
data_numsellers_lg = data.frame(cbind( incr10per_amzn_lg,
                                       incr20per_amzn_lg,
                                       incr5per_amzn_lg,
                                       decr10per_amzn_lg,
                                       decr20per_amzn_lg,
                                       decr5per_amzn_lg,
                                       buyboxprice_lg_lag1,
                                       lg_bottombrandlag1,
                                       lg_topbrandlag1,
                                       priceAmzn_lg_lag,
                                       weekend,
                                       #more additional variables - in absence of review text
                                       Answered_Questionslg_lag1,
                                       Product_reviews_lg_lag1,
                                       product_star_lg_lag1,
                                       salerank_lg_lag1,
                                       salerank_subcat_lg_lag1,
                                       num_lgused3p_lag1,
                                       num_lgusedamzn_lag1,
                                       seasonal_sale))   


#VARIANCE CHECK
variance_X = rep(0,ncol(data_numsellers_lg))

for (m in (1:ncol(data_numsellers_lg)))
{ variance_X[m] = var(data_numsellers_lg[,m]) }

nonzero_var = ncol(data_numsellers_lg)- sum((variance_X == 0)[]*1) #number of variables with non-zero variance

#Retaining variables with non-zero variance

trainX_retained = matrix(0,nrow(data_numsellers_lg),nonzero_var)

trainX_retained = data_numsellers_lg[,(variance_X != 0)]


data_numsellers_lg = trainX_retained


gam_numsellers_lg = gam(NUM_SELLERS_LG ~   Answered_Questionslg_lag1	+
                          buyboxprice_lg_lag1	+
                          nonbbox3pprice_lg_lag + 
                          decr10per_amzn_lg	+
                          priceAmzn_lg +
                       #   priceAmzn_lg_lag+
                         # decr5per_amzn_lg	+
                          Product_reviews_lg_lag1	+
                          salerank_subcat_lg_lag1	+
                           #product_star_lg_lag1	+
                          incr10per_amzn_lg	+
                           num_lgusedamzn_lag1	+
                            weekend	+
                          #incr5per_amzn_lg	+
                          salerank_lg_lag1	+
                          seasonal_sale, family= gaussian(link ='identity') ,data=data_numsellers_lg,method="REML",optimizer=c("outer","newton"), fit = TRUE)


summary(gam_numsellers_lg)

#PANASONIC: Number of sellers

data_numsellers_psonic = data.frame(cbind(incr10per_amzn_psonic,
                                      incr20per_amzn_psonic,
                                      incr5per_amzn_psonic,
                                      decr10per_amzn_psonic,
                                      decr20per_amzn_psonic,
                                      decr5per_amzn_psonic,
                                      incr10per_clust2_psonic,
                                      incr20per_clust2_psonic,
                                      incr5per_clust2_psonic,
                                      decr10per_clust2_psonic,
                                      decr20per_clust2_psonic,
                                      decr5per_clust2_psonic,
                                      incr10per_clust4_psonic,
                                      incr20per_clust4_psonic,
                                      incr5per_clust4_psonic,
                                      decr10per_clust4_psonic,
                                      decr20per_clust4_psonic,
                                      decr5per_clust4_psonic,
                                      meanbuyboxprice_psonic_lag1,
                                      mean_nonbbox3pprice_psonic_lag,
                                      psonic_bottombrandlag1,
                                      psonic_topbrandlag1,
                                      priceAmzn_psonic_lag,
                                      weekend,
                                      #more additional variables - in absence of review text
                                      meanans_questions_psonic_lag1,
                                      meanProduct_reviews_psonic_lag1,
                                      meanprodstar_psonic_lag1,
                                      meansalesrank_categ_psonic_lag1,
                                      meansalesrank_subcat_psonic_lag1,
                                      NUM_psonicused3p_lag1,
                                      NUM_psonicusedamzn_lag1,
                                      seasonal_sale))   
                                       

#VARIANCE CHECK
variance_X = rep(0,ncol(data_numsellers_psonic))

for (m in (1:ncol(data_numsellers_psonic)))
{ variance_X[m] = var(data_numsellers_psonic[,m]) }

nonzero_var = ncol(data_numsellers_psonic)- sum((variance_X == 0)[]*1) #number of variables with non-zero variance

#Retaining variables with non-zero variance

trainX_retained = matrix(0,nrow(data_numsellers_psonic),nonzero_var)

trainX_retained = data_numsellers_psonic[,(variance_X != 0)]

data_numsellers_psonic = trainX_retained

gam_numsellers_psonic = gam(numsellers_PSN ~ 
                            # decr10per_amzn_psonic	+
                             # decr20per_amzn_psonic	+
                             decr5per_amzn_psonic	+
                              meanans_questions_psonic_lag1	+
                              meanProduct_reviews_psonic_lag1	+
                             incr5per_amzn_psonic	+
                              meanbuyboxprice_psonic_lag1	+
                              #mean_buyboxprice_psonic +
                              mean_nonbbox3pprice_psonic_lag +
                             # mean_nonbbox3pprice_psonic +
                              priceAmzn_psonic_lag+
                             # priceAmzn_psonic +
                              #meansalesrank_categ_psonic_lag1	+
                              incr10per_amzn_psonic	+
                              #incr5per_clust4_psonic	+
                              #meanprodstar_psonic_lag1	+
                             # meansalesrank_subcat_psonic_lag1	+
                              weekend	+
                               incr20per_amzn_psonic	+
                              seasonal_sale, family= gaussian(link ='identity') ,data=data_numsellers_psonic,method="REML",optimizer=c("outer","newton"), fit = TRUE)


summary(gam_numsellers_psonic)
                          